 /* 
PROGRAM BY: JEFFREY WANG
Last edited 10-10-2018
            
This program cleans the 2007 CMF, matches with LBD, and generates material indicator to be used for CMF-MAT matching

    
Output is cmf_lbd2007 which includes all CMF plants with LBD ID, and positive emp + sales from LBD

Also outputs estabs2007_clean which does further cleaning and adds matcfn = 1 for firms with usable mat data
*/

options obs=max;


%include 'yyyy/pdata.sas';
%let lbdvers= _c201600 ; /* used for lbd and for fknaics */
        
options obs=max;
    

libname ecroot 'xxxx/data';

data estabs_all;
    set ecroot.all_ec;
    if source = "cmf";
    
    * drop if sales or employment < 0;
    if sales >= 0 & emp >= 0;
    
    * only keep 1992 onwards;
    if year >= 1992;
run;




* for 2002 - 2012: identify firms that actually received the long form;
* for 1992 - 1997: keep all obs for now;
data ecroot.estabs_all_clean;
    length shortform $ 1 matcfn $ 1;
    set estabs_all;
    if substr(form_mailed,4,1)="D" then shortform = 1;  * 'D' stands in for redacted values ;
    else shortform = 0;
    if source="cmf" & ar~=1 & shortform~=1 & 
        ((survu_type="MU" & (mailflg=" " | mailflg="D" | mailflg="D" |
			mailflg="D" | mailflg="D" | mailflg="D")) |
	(survu_type="SU" & (mailflg="D" | mailflg="D" | mailflg="D" | mailflg=" ")) | year ~= 2007) then matcfn = 1;
    matcfn = 1;
   
run; 
